1 Student Time Management Solution

# Load relevant libraries
library(dplyr)
library(rcompanion)
## Warning: package 'rcompanion' was built under R version 4.0.3
library(tidyverse)
library(GGally)
library(clustMixType)
## Warning: package 'clustMixType' was built under R version 4.0.3
library(arules)
library(arulesViz)
library(plotly)
library(abind)
library(timeDate)
library(rvest)

# Plot Theme
theme_set <- theme(legend.key = element_rect(fill="black"),
           legend.background = element_rect(color="white", fill="#263238"),
           plot.subtitle = element_text(size=6, color="white"),
           panel.background = element_rect(fill="#dddddd"),
           panel.border = element_rect(fill=NA),
           panel.grid.minor.x = element_blank(),
           panel.grid.major.x = element_blank(),
           panel.grid.major.y = element_line(color="darkgrey", linetype=2),
           panel.grid.minor.y = element_blank(),
           plot.background = element_rect(fill="#263238"),
           text = element_text(color="white"),
           axis.text = element_text(color="white"))

1.0.1 Data Set

Source https://www.kaggle.com/xiaowenlimarketing/international-student-time-management

# Load the data
original_data <- read.csv("International students Time management data.csv")
# Check the data
head(original_data)
# Check how many variables in the data
colnames(original_data)
##  [1] "Number"      "Age"         "Gender"      "Nationality" "Program"    
##  [6] "Course"      "English"     "Academic"    "Attendance"  "X6"         
## [11] "X7"          "X8"          "X9"          "X10"         "X11"        
## [16] "X12"         "X13"         "X14"         "X15"         "X16"        
## [21] "X17"

Where X6-X17 are You often feel that your life is aimless, with no definite purpose You never have trouble organizing the things you have to do? Once you’ve started an activity, you persist at it until you’ve completed it Sometimes you feel that the things you have to do during the day just don’t seem to matter You will plan your activities from day to day. You tend to leave things to the last minute? You tend to change rather aimlessly from one activity to another during the day. You give up the things that you planning to do just because your friend says no. You think you do enough with your time. You are easy to get bored with your day-today activities. The important interests/activities in your life tend to change frequently. You know how much time you spend on each of the homework I do.

# Subset the relevant data for the cleaning and transformation
data <- original_data %>%
    select(-c(Number,Nationality,Program,Course,
              English,Academic,Attendance))
# Construct the 'clean' function
clean <- function(x){
  # Reduce factor levels
  x$Age <- ifelse(x$Age == "26-30" | x$Age == ">36" | x$Age == "31-35", ">25",x$Age)
  x$Age <- ifelse(x$Age == "<18", "18-20",x$Age)
  x$Age <- ordered(x$Age, levels=c("18-20","21-25",">25"))
  # Change " " into Neither
  x$X6 <- ifelse(x$X6=="","Neither",x$X6)
  x$X8 <- ifelse(x$X8=="","Neither",x$X8)
  x$X9 <- ifelse(x$X9=="","Neither",x$X9)
  x$X10 <- ifelse(x$X10=="","Neither",x$X10)
  x$X11 <- ifelse(x$X11=="","Neither",x$X11)
  x$X12 <- ifelse(x$X12=="","Neither",x$X12)
  x$X14 <- ifelse(x$X14=="","Neither",x$X14)
  x$X16 <- ifelse(x$X16=="","Neither",x$X16)
  x$X17 <- ifelse(x$X17=="","Neither",x$X17)
  
  # Transform the character into Likert Scale 
  x$X6_num <- ifelse(x$X6=="Strong Agree",1,
                     ifelse(x$X6=="Agree",2,
                            ifelse(x$X6=="Neither",3,
                                   ifelse(x$X6=="Disagree",4,5))))

  x$X7_num <- ifelse(x$X7=="Strong Agree",5,
                     ifelse(x$X7=="Agree",4,
                            ifelse(x$X7=="Neither",3,
                                   ifelse(x$X7=="Disagree",2,1))))

  x$X8_num <- ifelse(x$X8=="Strong Agree",5,
                     ifelse(x$X8=="Agree",4,
                            ifelse(x$X8=="Neither",3,
                                   ifelse(x$X8=="Disagree",2,1))))

  x$X9_num <- ifelse(x$X9=="Strong Agree",1,
                     ifelse(x$X9=="Agree",2,
                            ifelse(x$X9=="Neither",3,
                                   ifelse(x$X9=="Disagree",4,5))))

  x$X10_num <- ifelse(x$X10=="Strong Agree",5,
                      ifelse(x$X10=="Agree",4,
                             ifelse(x$X10=="Neither",3,
                                    ifelse(x$X10=="Disagree",2,1))))

  x$X11_num <- ifelse(x$X11=="Strong Agree",1,
                      ifelse(x$X11=="Agree",2,
                             ifelse(x$X11=="Neither",3,
                                    ifelse(x$X11=="Disagree",4,5))))

  x$X12_num <- ifelse(x$X12=="Strong Agree",1,
                      ifelse(x$X12=="Agree",2,
                             ifelse(x$X12=="Neither",3,
                                    ifelse(x$X12=="Disagree",4,5))))

  x$X13_num <- ifelse(x$X13=="Strong Agree",1,
                      ifelse(x$X13=="Agree",2,
                             ifelse(x$X13=="Neither",3,
                                    ifelse(x$X13=="Disagree",4,5))))

  x$X14_num <- ifelse(x$X14=="Strong Agree",5,
                      ifelse(x$X14=="Agree",4,
                             ifelse(x$X14=="Neither",3,
                                    ifelse(x$X14=="Disagree",2,1))))

  x$X15_num <- ifelse(x$X15=="Strong Agree",1,
                      ifelse(x$X15=="Agree",2,
                             ifelse(x$X15=="Neither",3,
                                    ifelse(x$X15=="Disagree",4,5))))

  x$X16_num <- ifelse(x$X16=="Strong Agree",1,
                      ifelse(x$X16=="Agree",2,
                             ifelse(x$X16=="Neither",3,
                                    ifelse(x$X16=="Disagree",4,5))))

  x$X17_num <- ifelse(x$X17=="Strong Agree",5,
                      ifelse(x$X17=="Agree",4,
                             ifelse(x$X17=="Neither",3,
                                    ifelse(x$X17=="Disagree",2,1))))

  # Sum the total score 
  x$score <- x$X6_num + x$X7_num + x$X8_num + x$X9_num + 
    x$X10_num + x$X11_num +x$X12_num + x$X13_num +
    x$X14_num +x$X15_num + x$X16_num +x$X17_num
  
  # Character into Factor
  x <- x %>%
  mutate_if(is.character,as.factor)
}

Note: should check one more time about the meaning of the likert scale, can we conclude that as we have a higher total score, then it is better? or worse?

# Apply the clean function into the data
data <- clean(data)
# check the data
head(data)
# Subset the data for manipulation
data_ques <- data %>% select(c(15:27))

# Rename the variable's name
colnames(data_ques) <- c("X6","X7","X8","X9","X10",
                         "X11","X12","X13","X14","X15",
                         "X16","X17","Score")
# Visualize the Total Score distribution
ggplot(data, aes(x = score))+
  geom_histogram(col = "lightblue")+
  theme_set + ggtitle("Total Score Distribution")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Distribution = okay

summary(data_ques$Score)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   26.00   34.00   37.00   37.36   41.00   55.00

If the mean and median is close, we are on the right track.

# Checking the Age Cummulation based on Gender
ggplot(data, aes(x = Age)) +
  geom_bar(aes(x = Age, fill = Gender), position = "dodge") +
  theme_set + ggtitle("Histogram of Age based on Gender")

Insight: Students with age interval between 21-25 are better in time management? (Check note)

# Checking Score Distribution based on Gender and Age
ggplot(data, aes(x = Age, y = score)) +
  geom_boxplot(aes(fill = Gender)) +
  theme_set + ggtitle("Score Distribution based on Gender and Age") 

1.1 Clustering with K-Means

data_kmean <- data %>%
  select(-c(15:26))
# Determine the number of clustering use the Elbow Plot
set.seed(100)
# wss function to calculate the total distance within member
wss <- function(k) {
  kproto(data_kmean, k)$tot.withinss
}

# Define k = 1 to 10 
k.values <- 1:10
# Extract the wss for 2-10 clusters
wss_values <- map_dbl(k.values, wss)
## # NAs in variables:
##    Age Gender     X6     X7     X8     X9    X10    X11    X12    X13    X14 
##      0      0      0      0      0      0      0      0      0      0      0 
##    X15    X16    X17  score 
##      0      0      0      0 
## 0 observation(s) with NAs.
## 
## Estimated lambda: 40.0875 
## 
## # NAs in variables:
##    Age Gender     X6     X7     X8     X9    X10    X11    X12    X13    X14 
##      0      0      0      0      0      0      0      0      0      0      0 
##    X15    X16    X17  score 
##      0      0      0      0 
## 0 observation(s) with NAs.
## 
## Estimated lambda: 40.0875 
## 
## # NAs in variables:
##    Age Gender     X6     X7     X8     X9    X10    X11    X12    X13    X14 
##      0      0      0      0      0      0      0      0      0      0      0 
##    X15    X16    X17  score 
##      0      0      0      0 
## 0 observation(s) with NAs.
## 
## Estimated lambda: 40.0875 
## 
## # NAs in variables:
##    Age Gender     X6     X7     X8     X9    X10    X11    X12    X13    X14 
##      0      0      0      0      0      0      0      0      0      0      0 
##    X15    X16    X17  score 
##      0      0      0      0 
## 0 observation(s) with NAs.
## 
## Estimated lambda: 40.0875 
## 
## # NAs in variables:
##    Age Gender     X6     X7     X8     X9    X10    X11    X12    X13    X14 
##      0      0      0      0      0      0      0      0      0      0      0 
##    X15    X16    X17  score 
##      0      0      0      0 
## 0 observation(s) with NAs.
## 
## Estimated lambda: 40.0875 
## 
## # NAs in variables:
##    Age Gender     X6     X7     X8     X9    X10    X11    X12    X13    X14 
##      0      0      0      0      0      0      0      0      0      0      0 
##    X15    X16    X17  score 
##      0      0      0      0 
## 0 observation(s) with NAs.
## 
## Estimated lambda: 40.0875 
## 
## # NAs in variables:
##    Age Gender     X6     X7     X8     X9    X10    X11    X12    X13    X14 
##      0      0      0      0      0      0      0      0      0      0      0 
##    X15    X16    X17  score 
##      0      0      0      0 
## 0 observation(s) with NAs.
## 
## Estimated lambda: 40.0875 
## 
## # NAs in variables:
##    Age Gender     X6     X7     X8     X9    X10    X11    X12    X13    X14 
##      0      0      0      0      0      0      0      0      0      0      0 
##    X15    X16    X17  score 
##      0      0      0      0 
## 0 observation(s) with NAs.
## 
## Estimated lambda: 40.0875 
## 
## # NAs in variables:
##    Age Gender     X6     X7     X8     X9    X10    X11    X12    X13    X14 
##      0      0      0      0      0      0      0      0      0      0      0 
##    X15    X16    X17  score 
##      0      0      0      0 
## 0 observation(s) with NAs.
## 
## Estimated lambda: 40.0875 
## 
## # NAs in variables:
##    Age Gender     X6     X7     X8     X9    X10    X11    X12    X13    X14 
##      0      0      0      0      0      0      0      0      0      0      0 
##    X15    X16    X17  score 
##      0      0      0      0 
## 0 observation(s) with NAs.
## 
## Estimated lambda: 40.0875

Why 3? Reason:

# Clustering Model
set.seed(100) 

# Set the model with 3 clusters
model_clus <- kproto(data_kmean, 3)
## # NAs in variables:
##    Age Gender     X6     X7     X8     X9    X10    X11    X12    X13    X14 
##      0      0      0      0      0      0      0      0      0      0      0 
##    X15    X16    X17  score 
##      0      0      0      0 
## 0 observation(s) with NAs.
## 
## Estimated lambda: 40.0875
# Input new data as a (for simulation only) 
a <- c("21-25", "F", "Strong Agree","Strong Disagree","Strong Disagree", 
       "Strong Agree","Strong Disagree", "Strong Agree","Strong Agree",
       "Strong Agree","Disagree","Strong Disagree","Strong Disagree",
       "Strong Disagree")
# New data pre-processing
# Change the input into a dataframe
a <- data.frame(a)

# Transpose the input data
a <- data.frame(t(a))

# Rename the columns 
colnames(a) <- c("Age","Gender","X6","X7","X8",
                 "X9","X10","X11","X12","X13",
                 "X14","X15","X16","X17")

# Apply the function 
a <- clean(a)

# Since the kproton uses min of 2 observations, 
# we bind it with the last observation from the original data
a <- rbind(data[125,],a)

# Subset the input data
a_kmean <- a %>%
  select(-c(15:26))
# Predict the model
set.seed(100)
test <- predict(model_clus,a_kmean)
# Select the clustering result
cluster <- append(model_clus$cluster, test$cluster)
cluster <- as.factor(cluster)

# Make a new data frame from original data + input data
newdata <- rbind(data,a)

# Column bind data 2 + cluster result
data_clus <- cbind(newdata, cluster)

# A new data frame only the 'input data' for visualization needs
data_new <- data_clus[127,]

Note: avoid use data1 data2, not good for reader

#For not overlaping
set.seed(100)
jitter <- position_jitter(width = 0.1, height = 0.1)

#plot clustering original
ggplot(NULL, aes(x = Age, y = score))+
  geom_point(data = data_kmean, aes(col = as.factor(model_clus$cluster)), 
             position = jitter) +
  theme(legend.position = "bottom") +
  guides(fill=FALSE) +
  scale_colour_manual(name = "Cluster",
                      labels = c("Bad", "Good","Normal"),
                      values = c("red","dark green", "gold")) +
  theme_set + ggtitle("Clustering Result")

# Boxplot of cluster and score (Optional = I prefer this plotly)
boxplot_cluster <- ggplot(data_kmean, aes(x = as.factor(model_clus$cluster), y = score)) +
  geom_boxplot()
ggplotly(boxplot_cluster)
# Plot clustering with new data
ggplot(NULL,aes(x=Age,y=score))+
  geom_point(data=data_clus , aes(col=cluster), position = jitter) +
  geom_jitter(data = data_new, aes(col=cluster), shape=8, size=3, stroke=2) +
  theme(legend.position = "none") +
  guides(fill=FALSE) +
  scale_colour_manual(name="Cluster", labels=c("Bad", "Good","Normal"),
                      values=c("red","dark green", "gold")) +
  theme_set + ggtitle("")

Note: found the red star, means? What is the title for this plot?

1.2 Association rules

# Take only the survey results & Cluster
data_arules <- data_clus %>% 
  select(c(3:14,28)) # Original survey results and 28 for the cluster identity

data_arules$cluster <- ifelse(data_arules$cluster==1, "Bad",
                              ifelse(data_arules$cluster==2, "Good", "Normal"))
# Check data_clus
glimpse(data_clus)
## Rows: 127
## Columns: 28
## $ Age     <ord> >25, >25, >25, 21-25, 21-25, 21-25, 21-25, 21-25, 21-25, 21...
## $ Gender  <fct> M, M, M, M, M, M, M, M, M, M, M, M, M, M, M, M, M, M, M, M,...
## $ X6      <fct> Disagree, Strong Agree, Disagree, Disagree, Neither, Agree,...
## $ X7      <fct> Agree, Agree, Strong Agree, Disagree, Disagree, Neither, Ag...
## $ X8      <fct> Strong Agree, Neither, Agree, Agree, Neither, Agree, Agree,...
## $ X9      <fct> Neither, Disagree, Disagree, Agree, Neither, Strong Agree, ...
## $ X10     <fct> Agree, Agree, Agree, Disagree, Disagree, Disagree, Agree, A...
## $ X11     <fct> Neither, Neither, Agree, Agree, Neither, Disagree, Neither,...
## $ X12     <fct> Disagree, Disagree, Disagree, Neither, Neither, Strong Agre...
## $ X13     <fct> Strong Disagree, Strong Disagree, Strong Disagree, Neither,...
## $ X14     <fct> Strong Agree, Neither, Disagree, Neither, Neither, Strong A...
## $ X15     <fct> Neither, Agree, Strong Agree, Neither, Disagree, Strong Agr...
## $ X16     <fct> Disagree, Neither, Strong Agree, Disagree, Neither, Strong ...
## $ X17     <fct> Agree, Disagree, Disagree, Agree, Agree, Strong Agree, Neit...
## $ X6_num  <dbl> 4, 1, 4, 4, 3, 2, 4, 2, 4, 5, 5, 3, 4, 2, 4, 1, 3, 4, 4, 2,...
## $ X7_num  <dbl> 4, 4, 5, 2, 2, 3, 4, 3, 2, 4, 3, 2, 3, 4, 4, 5, 2, 2, 2, 4,...
## $ X8_num  <dbl> 5, 3, 4, 4, 3, 4, 4, 3, 1, 4, 3, 3, 3, 4, 2, 4, 4, 3, 4, 5,...
## $ X9_num  <dbl> 3, 4, 4, 2, 3, 1, 3, 2, 2, 3, 3, 3, 3, 2, 2, 3, 2, 2, 4, 2,...
## $ X10_num <dbl> 4, 4, 4, 2, 2, 2, 4, 4, 2, 2, 3, 5, 2, 3, 1, 5, 3, 3, 5, 5,...
## $ X11_num <dbl> 3, 3, 2, 2, 3, 4, 3, 3, 2, 5, 3, 2, 4, 3, 4, 1, 4, 5, 4, 1,...
## $ X12_num <dbl> 4, 4, 4, 3, 3, 1, 3, 2, 2, 5, 3, 3, 4, 2, 1, 2, 4, 3, 4, 2,...
## $ X13_num <dbl> 5, 5, 5, 3, 3, 2, 5, 3, 4, 5, 3, 4, 4, 3, 5, 1, 4, 2, 2, 1,...
## $ X14_num <dbl> 5, 3, 2, 3, 3, 5, 2, 4, 4, 4, 3, 4, 2, 5, 4, 4, 3, 3, 4, 4,...
## $ X15_num <dbl> 3, 2, 1, 3, 4, 1, 5, 2, 4, 2, 3, 2, 4, 3, 3, 3, 4, 1, 4, 2,...
## $ X16_num <dbl> 4, 3, 1, 4, 3, 5, 5, 2, 4, 5, 3, 3, 2, 2, 2, 2, 4, 3, 2, 2,...
## $ X17_num <dbl> 4, 2, 2, 4, 4, 5, 3, 4, 2, 1, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4,...
## $ score   <dbl> 48, 38, 38, 36, 36, 35, 45, 34, 33, 45, 38, 38, 39, 37, 34,...
## $ cluster <fct> 2, 3, 2, 3, 3, 3, 2, 3, 1, 2, 3, 3, 2, 3, 1, 3, 2, 3, 2, 1,...

1.3 Bad Time Management

# Apply the association rules with Apriori Algorithm
mba_bad <- apriori(data_arules, 
                   parameter = list(sup = 0.05, conf = 0.5,
                                    target="rules", minlen=3, maxlen=4),
                   appearance = list(rhs= "cluster=Bad", default = "lhs"))
## Warning: Column(s) 13 not logical or factor. Applying default discretization
## (see '? discretizeDF').
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.5    0.1    1 none FALSE            TRUE       5    0.05      3
##  maxlen target  ext
##       4  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 6 
## 
## set item appearances ...[1 item(s)] done [0.00s].
## set transactions ...[63 item(s), 127 transaction(s)] done [0.00s].
## sorting and recoding items ... [56 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4
## Warning in apriori(data_arules, parameter = list(sup = 0.05, conf = 0.5, :
## Mining stopped (maxlen reached). Only patterns up to a length of 4 returned!
##  done [0.00s].
## writing ... [44 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].

Note : Got an error about column 13 since it is not logical. But why? Glimpse says it is.

# Inspect the rules
inspect(head(sort(mba_bad, by="lift"),10))[,c(1:5,7)]
##      lhs               rhs              support confidence   coverage     lift count
## [1]  {X10=Disagree,                                                                 
##       X17=Disagree} => {cluster=Bad} 0.07086614  1.0000000 0.07086614 3.968750     9
## [2]  {X10=Disagree,                                                                 
##       X12=Agree,                                                                    
##       X17=Disagree} => {cluster=Bad} 0.05511811  1.0000000 0.05511811 3.968750     7
## [3]  {X7=Disagree,                                                                  
##       X10=Disagree,                                                                 
##       X17=Disagree} => {cluster=Bad} 0.06299213  1.0000000 0.06299213 3.968750     8
## [4]  {X7=Disagree,                                                                  
##       X11=Agree,                                                                    
##       X17=Disagree} => {cluster=Bad} 0.05511811  1.0000000 0.05511811 3.968750     7
## [5]  {X7=Disagree,                                                                  
##       X9=Agree,                                                                     
##       X17=Disagree} => {cluster=Bad} 0.05511811  1.0000000 0.05511811 3.968750     7
## [6]  {X7=Disagree,                                                                  
##       X17=Disagree} => {cluster=Bad} 0.09448819  0.9230769 0.10236220 3.663462    12
## [7]  {X8=Disagree,                                                                  
##       X17=Disagree} => {cluster=Bad} 0.05511811  0.8750000 0.06299213 3.472656     7
## [8]  {X10=Disagree,                                                                 
##       X11=Agree}    => {cluster=Bad} 0.05511811  0.8750000 0.06299213 3.472656     7
## [9]  {X10=Disagree,                                                                 
##       X12=Agree}    => {cluster=Bad} 0.05511811  0.8750000 0.06299213 3.472656     7
## [10] {X10=Disagree,                                                                 
##       X16=Agree}    => {cluster=Bad} 0.05511811  0.8750000 0.06299213 3.472656     7
## NULL

Note: focus on the first 5, they have confidence value = 1.0

# Plot the rules
plot(head(sort(mba_bad,by="lift"),10),method="graph",)

1.4 Alternative Plots

subRules<-mba_bad[quality(mba_bad)$confidence > 0.2]

1.4.1 Interactive Scatter-Plot

The scatter plot shows the lift for each rule.

plotly_arules(subRules)
## Warning: 'plotly_arules' is deprecated.
## Use 'plot' instead.
## See help("Deprecated")
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

1.4.2 Graph-Based Visualizations

With this plot, we can see the arrow as a relationship between products. The size of rule is getting bigger as the lift increase.

# choose the top 10
top10subRules <- head(subRules, n = 10, by = "confidence")
plot(top10subRules, method = "graph",  engine = "htmlwidget")

1.4.3 Individual Rule Representation

This representation is also called as Parallel Coordinates Plot. It is useful to visualized which products along with which items cause what kind of sales.

subRules2<-head(subRules, n=10, by="lift")
plot(subRules2, method="paracoord")

Note: need to transpose/observe/gain deep understanding/making a note for each survey question towards the likert scale to make sure we read this right :)

Note: Please do the same with the following code

1.5 Good Time Management

mba_good <- apriori(data_arules, 
                    parameter = list(sup = 0.05, conf = 0.5,
                                     target="rules",minlen=3,maxlen=4),
                    appearance = list(rhs= "cluster=Good", default = "lhs"))
## Warning: Column(s) 13 not logical or factor. Applying default discretization
## (see '? discretizeDF').
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.5    0.1    1 none FALSE            TRUE       5    0.05      3
##  maxlen target  ext
##       4  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 6 
## 
## set item appearances ...[1 item(s)] done [0.00s].
## set transactions ...[63 item(s), 127 transaction(s)] done [0.00s].
## sorting and recoding items ... [56 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4
## Warning in apriori(data_arules, parameter = list(sup = 0.05, conf = 0.5, :
## Mining stopped (maxlen reached). Only patterns up to a length of 4 returned!
##  done [0.00s].
## writing ... [269 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
inspect(head(sort(mba_good, by="lift"),10))[,c(1:5,7)]
##      lhs                     rhs               support confidence   coverage lift count
## [1]  {X6=Strong Disagree,                                                              
##       X8=Agree}           => {cluster=Good} 0.06299213          1 0.06299213 2.54     8
## [2]  {X9=Disagree,                                                                     
##       X16=Disagree}       => {cluster=Good} 0.05511811          1 0.05511811 2.54     7
## [3]  {X12=Disagree,                                                                    
##       X15=Disagree}       => {cluster=Good} 0.06299213          1 0.06299213 2.54     8
## [4]  {X15=Disagree,                                                                    
##       X17=Neither}        => {cluster=Good} 0.06299213          1 0.06299213 2.54     8
## [5]  {X11=Disagree,                                                                    
##       X15=Disagree}       => {cluster=Good} 0.08661417          1 0.08661417 2.54    11
## [6]  {X11=Disagree,                                                                    
##       X12=Disagree}       => {cluster=Good} 0.10236220          1 0.10236220 2.54    13
## [7]  {X7=Agree,                                                                        
##       X16=Disagree}       => {cluster=Good} 0.06299213          1 0.06299213 2.54     8
## [8]  {X8=Agree,                                                                        
##       X12=Disagree,                                                                    
##       X15=Disagree}       => {cluster=Good} 0.05511811          1 0.05511811 2.54     7
## [9]  {X11=Disagree,                                                                    
##       X15=Disagree,                                                                    
##       X16=Disagree}       => {cluster=Good} 0.06299213          1 0.06299213 2.54     8
## [10] {X15=Disagree,                                                                    
##       X16=Disagree,                                                                    
##       X17=Agree}          => {cluster=Good} 0.06299213          1 0.06299213 2.54     8
## NULL
plot(head(sort(mba_good,by="lift"),10),
     method="graph",control=list(layout=igraph::in_circle()))

2 Scrape for time management tips

link <- paste0("https://quickbooks.intuit.com/r/employee-management/time-management-tips/#:~:text=If%20you%20want%20to%20improve%20your%20time%20management,compare%20actual%20time%20spent%20and%20estimated%20time%20spent.")
pages <- read_html(link)
Tips <- pages %>% 
      html_nodes(".body-article .content-article h2") %>%
      html_text()
Tips <- Tips[3:27]

#"or for more info you can clik [here](https://quickbooks.intuit.com/r/employee-management/time-management-tips/#:~:text=If%20you%20want%20to%20improve%20your%20time%20management,compare%20actual%20time%20spent%20and%20estimated%20time%20spent.)" #nanti dicantumin authornya sama linknya kita ambil tips) #thumbs up! well done GB